From: Boyuan Yang Date: Sun, 19 Sep 2021 00:22:17 +0000 (-0400) Subject: debian/patches: Drop patch 0006/0007 (merged upstream) X-Git-Tag: archive/raspbian/1.1.9+ds1-4+rpi1^2^2~69 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks:///%22http:/www.example.com/cgi/%22https:/%22bookmarks:/?a=commitdiff_plain;h=81f6b5bbc2f666464033b459b238391f251fe6a8;p=opencc.git debian/patches: Drop patch 0006/0007 (merged upstream) --- diff --git a/debian/patches/0006-Fix-a-bug-in-the-calculation-of-DictGroup-keyMaxLeng.patch b/debian/patches/0006-Fix-a-bug-in-the-calculation-of-DictGroup-keyMaxLeng.patch deleted file mode 100644 index d89f9a3..0000000 --- a/debian/patches/0006-Fix-a-bug-in-the-calculation-of-DictGroup-keyMaxLeng.patch +++ /dev/null @@ -1,97 +0,0 @@ -From: Carbo Kuo -Date: Thu, 25 Feb 2021 20:48:50 +0900 -Subject: Fix a bug in the calculation of DictGroup::keyMaxLength. - -The length should be the maximum of all sub-dictionaries in the dictionary group. ---- - src/DictGroup.cpp | 16 ++++++++++++++-- - src/DictGroupTest.cpp | 32 ++++++++++++++++++++++++-------- - 2 files changed, 38 insertions(+), 10 deletions(-) - -diff --git a/src/DictGroup.cpp b/src/DictGroup.cpp -index 4ca9e33..c682e96 100644 ---- a/src/DictGroup.cpp -+++ b/src/DictGroup.cpp -@@ -1,7 +1,7 @@ - /* - * Open Chinese Convert - * -- * Copyright 2010-2014 Carbo Kuo -+ * Copyright 2010-2021 Carbo Kuo - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -24,8 +24,20 @@ - - using namespace opencc; - -+namespace { -+ -+size_t GetKeyMaxLength(const std::list& dicts) { -+ size_t keyMaxLength = 0; -+ for (const DictPtr& dict : dicts) { -+ keyMaxLength = std::max(keyMaxLength, dict->KeyMaxLength()); -+ } -+ return keyMaxLength; -+} -+ -+} // namespace -+ - DictGroup::DictGroup(const std::list& _dicts) -- : keyMaxLength(0), dicts(_dicts) {} -+ : keyMaxLength(GetKeyMaxLength(_dicts)), dicts(_dicts) {} - - DictGroup::~DictGroup() {} - -diff --git a/src/DictGroupTest.cpp b/src/DictGroupTest.cpp -index 7003506..c91731e 100644 ---- a/src/DictGroupTest.cpp -+++ b/src/DictGroupTest.cpp -@@ -1,7 +1,7 @@ - /* - * Open Chinese Convert - * -- * Copyright 2015 Carbo Kuo -+ * Copyright 2015-2021 Carbo Kuo - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. -@@ -25,15 +25,31 @@ protected: - DictGroupTest() {} - }; - --TEST_F(DictGroupTest, SimpleGroupTest) { -+TEST_F(DictGroupTest, KeyMaxLength) { - const DictGroupPtr& dictGroup = CreateDictGroupForConversion(); -- const auto& entry = dictGroup->Dict::MatchPrefix(utf8("Unknown")); -- EXPECT_TRUE(entry.IsNull()); -+ EXPECT_EQ(6, dictGroup->KeyMaxLength()); -+ EXPECT_EQ(6, dictGroup->GetDicts().front()->KeyMaxLength()); -+ EXPECT_EQ(3, dictGroup->GetDicts().back()->KeyMaxLength()); -+} - -- const auto& matches = dictGroup->Dict::MatchAllPrefixes(utf8("干燥")); -- EXPECT_EQ(2, matches.size()); -- EXPECT_EQ(utf8("乾燥"), matches.at(0)->GetDefault()); -- EXPECT_EQ(utf8("幹"), matches.at(1)->GetDefault()); -+TEST_F(DictGroupTest, SimpleGroupTest) { -+ const DictGroupPtr& dictGroup = CreateDictGroupForConversion(); -+ { -+ const auto& entry = dictGroup->Dict::MatchPrefix(utf8("Unknown")); -+ EXPECT_TRUE(entry.IsNull()); -+ } -+ { -+ const auto& entry = dictGroup->Dict::MatchPrefix(utf8("里面")); -+ EXPECT_FALSE(entry.IsNull()); -+ EXPECT_EQ(3, entry.Get()->KeyLength()); -+ EXPECT_EQ(utf8("裏"), entry.Get()->GetDefault()); -+ } -+ { -+ const auto& matches = dictGroup->Dict::MatchAllPrefixes(utf8("干燥")); -+ EXPECT_EQ(2, matches.size()); -+ EXPECT_EQ(utf8("乾燥"), matches.at(0)->GetDefault()); -+ EXPECT_EQ(utf8("幹"), matches.at(1)->GetDefault()); -+ } - } - - TEST_F(DictGroupTest, TaiwanPhraseGroupTest) { diff --git a/debian/patches/0007-Fix-a-severe-performance-bug-in-Conversion-Convert-t.patch b/debian/patches/0007-Fix-a-severe-performance-bug-in-Conversion-Convert-t.patch deleted file mode 100644 index a614830..0000000 --- a/debian/patches/0007-Fix-a-severe-performance-bug-in-Conversion-Convert-t.patch +++ /dev/null @@ -1,188 +0,0 @@ -From: Carbo Kuo -Date: Thu, 25 Feb 2021 21:13:38 +0900 -Subject: Fix a severe performance bug in `Conversion::Convert` that caused - O(N^2) complexity. - -In `Conversion.cpp`, line 27: -``` - Optional matched = dict->MatchPrefix(pstr); -``` -pstr is a `const char*`. However, there is no overloaded function which parameter `const char*`. -Therefore it matches `Optional MatchPrefix(const std::string& word) const`. -There is an implicit type conversion from `char*` to `std::string` with time complexity O(N). - -I added new benchmark tests. Before the fix: - -1: ------------------------------------------------------------------ -1: Benchmark Time CPU Iterations -1: ------------------------------------------------------------------ -1: BM_Initialization/hk2s 1.17 ms 1.12 ms 645 -1: BM_Initialization/hk2t 0.116 ms 0.116 ms 5922 -1: BM_Initialization/jp2t 0.206 ms 0.201 ms 3500 -1: BM_Initialization/s2hk 18.2 ms 17.9 ms 40 -1: BM_Initialization/s2t 18.2 ms 18.1 ms 39 -1: BM_Initialization/s2tw 17.9 ms 17.8 ms 39 -1: BM_Initialization/s2twp 18.6 ms 18.4 ms 39 -1: BM_Initialization/t2hk 0.055 ms 0.054 ms 12907 -1: BM_Initialization/t2jp 0.120 ms 0.117 ms 5978 -1: BM_Initialization/t2s 0.988 ms 0.984 ms 710 -1: BM_Initialization/tw2s 1.08 ms 1.05 ms 672 -1: BM_Initialization/tw2sp 1.26 ms 1.24 ms 563 -1: BM_Initialization/tw2t 0.088 ms 0.083 ms 8528 -1: BM_Convert2M 413 ms 413 ms 2 -1: BM_Convert/100 1.09 ms 1.09 ms 629 -1: BM_Convert/1000 33.2 ms 33.2 ms 21 -1: BM_Convert/10000 2822 ms 2822 ms 1 -1: BM_Convert/100000 (took longer than 5 minutes, killed) - -Now: -1: ------------------------------------------------------------------ -1: Benchmark Time CPU Iterations -1: ------------------------------------------------------------------ -1: BM_Initialization/hk2s 1.07 ms 1.07 ms 650 -1: BM_Initialization/hk2t 0.114 ms 0.114 ms 6092 -1: BM_Initialization/jp2t 0.204 ms 0.200 ms 3503 -1: BM_Initialization/s2hk 18.2 ms 18.0 ms 40 -1: BM_Initialization/s2t 17.6 ms 17.6 ms 39 -1: BM_Initialization/s2tw 18.0 ms 17.9 ms 40 -1: BM_Initialization/s2twp 17.9 ms 17.9 ms 39 -1: BM_Initialization/t2hk 0.055 ms 0.055 ms 12855 -1: BM_Initialization/t2jp 0.125 ms 0.124 ms 5772 -1: BM_Initialization/t2s 1.000 ms 0.989 ms 695 -1: BM_Initialization/tw2s 1.09 ms 1.07 ms 668 -1: BM_Initialization/tw2sp 1.29 ms 1.26 ms 558 -1: BM_Initialization/tw2t 0.082 ms 0.082 ms 8558 -1: BM_Convert2M 361 ms 361 ms 2 -1: BM_Convert/100 0.741 ms 0.740 ms 948 -1: BM_Convert/1000 7.54 ms 7.52 ms 94 -1: BM_Convert/10000 76.3 ms 76.3 ms 9 -1: BM_Convert/100000 786 ms 786 ms 1 - -This bug was reported in https://github.com/BYVoid/OpenCC/issues/478 and https://github.com/BYVoid/OpenCC/issues/517. ---- - src/Dict.hpp | 7 ++++ - src/benchmark/Performance.cpp | 77 ++++++++++++++++++++++++++++++++++--------- - 2 files changed, 69 insertions(+), 15 deletions(-) - -diff --git a/src/Dict.hpp b/src/Dict.hpp -index 461d6d2..1c81034 100644 ---- a/src/Dict.hpp -+++ b/src/Dict.hpp -@@ -49,6 +49,13 @@ public: - virtual Optional MatchPrefix(const char* word, - size_t len) const; - -+ /** -+ * Matches the longest matched prefix of a word. -+ */ -+ Optional MatchPrefix(const char* word) const { -+ return MatchPrefix(word, KeyMaxLength()); -+ } -+ - /** - * Matches the longest matched prefix of a word. - */ -diff --git a/src/benchmark/Performance.cpp b/src/benchmark/Performance.cpp -index cf8d3aa..d1b6468 100644 ---- a/src/benchmark/Performance.cpp -+++ b/src/benchmark/Performance.cpp -@@ -1,7 +1,26 @@ -+/* -+ * Open Chinese Convert -+ * -+ * Copyright 2020-2021 Carbo Kuo -+ * -+ * Licensed under the Apache License, Version 2.0 (the "License"); -+ * you may not use this file except in compliance with the License. -+ * You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ - #include - #include - #include - #include -+#include - #include - - #ifdef _MSC_VER -@@ -44,21 +63,32 @@ static void BM_Initialization(benchmark::State& state, - state.ResumeTiming(); - } - } --BENCHMARK_CAPTURE(BM_Initialization, hk2s, "hk2s"); --BENCHMARK_CAPTURE(BM_Initialization, hk2t, "hk2t"); --BENCHMARK_CAPTURE(BM_Initialization, jp2t, "jp2t"); --BENCHMARK_CAPTURE(BM_Initialization, s2hk, "s2hk"); --BENCHMARK_CAPTURE(BM_Initialization, s2t, "s2t"); --BENCHMARK_CAPTURE(BM_Initialization, s2tw, "s2tw"); --BENCHMARK_CAPTURE(BM_Initialization, s2twp, "s2twp"); --BENCHMARK_CAPTURE(BM_Initialization, t2hk, "t2hk"); --BENCHMARK_CAPTURE(BM_Initialization, t2jp, "t2jp"); --BENCHMARK_CAPTURE(BM_Initialization, t2s, "t2s"); --BENCHMARK_CAPTURE(BM_Initialization, tw2s, "tw2s"); --BENCHMARK_CAPTURE(BM_Initialization, tw2sp, "tw2sp"); --BENCHMARK_CAPTURE(BM_Initialization, tw2t, "tw2t"); -+BENCHMARK_CAPTURE(BM_Initialization, hk2s, "hk2s") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, hk2t, "hk2t") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, jp2t, "jp2t") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, s2hk, "s2hk") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, s2t, "s2t")->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, s2tw, "s2tw") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, s2twp, "s2twp") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, t2hk, "t2hk") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, t2jp, "t2jp") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, t2s, "t2s")->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, tw2s, "tw2s") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, tw2sp, "tw2sp") -+ ->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Initialization, tw2t, "tw2t") -+ ->Unit(benchmark::kMillisecond); - --static void BM_Convert(benchmark::State& state) { -+static void BM_Convert2M(benchmark::State& state) { - const std::string config_name = "s2t"; - const std::string text = ReadText("zuozhuan.txt"); - const std::unique_ptr converter(Initialize(config_name)); -@@ -66,7 +96,24 @@ static void BM_Convert(benchmark::State& state) { - Convert(converter.get(), text); - } - } --BENCHMARK(BM_Convert)->Unit(benchmark::kMillisecond); -+BENCHMARK(BM_Convert2M)->Unit(benchmark::kMillisecond); -+ -+static void BM_Convert(benchmark::State& state, int iteration) { -+ std::ostringstream os; -+ for (int i = 0; i < iteration; i++) { -+ os << "Open Chinese Convert 開放中文轉換" << i << std::endl; -+ } -+ const std::string text = os.str(); -+ const std::string config_name = "s2t"; -+ const std::unique_ptr converter(Initialize(config_name)); -+ for (auto _ : state) { -+ Convert(converter.get(), text); -+ } -+} -+BENCHMARK_CAPTURE(BM_Convert, 100, 100)->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Convert, 1000, 1000)->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Convert, 10000, 10000)->Unit(benchmark::kMillisecond); -+BENCHMARK_CAPTURE(BM_Convert, 100000, 100000)->Unit(benchmark::kMillisecond); - - } // namespace opencc - diff --git a/debian/patches/series b/debian/patches/series index ad3c3d6..f24fc97 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -3,5 +3,3 @@ 0003-data-Explicitly-use-python3.patch 0004-no-remote-images-when-reading-docs-on-disk.patch 0005-Use-system-googletest.patch -0006-Fix-a-bug-in-the-calculation-of-DictGroup-keyMaxLeng.patch -0007-Fix-a-severe-performance-bug-in-Conversion-Convert-t.patch